import numpy as np
import pandas as pd


class FileReader:
    def __init__(self, file, x_header, y_header=None):
        self.file = file
        self.x_header = x_header
        self.y_header = y_header

    def read(self, order_by=False):
        """
            read a csv file, and return y, x pairs.
        :param order_by:
        :param file: the file path
        :param y: header of y
        :param x: header of x
        :param dropna: boolean, whether drop Nan values.
        :return: y,x pairs
        """
        file_dir = '../data/{}'.format(self.file)
        df = pd.read_table(file_dir, sep='|')
        if order_by:
            df.sort_values(self.x_header, inplace=True)
            df.reset_index(drop=True, inplace=True)
        total_size = df.shape[0]

        if self.y_header is not None:
            x = df[self.x_header].values.reshape(-1, 1)
            y = df[self.y_header].values
        else:
            x = np.array(df.index.tolist()).reshape(-1, 1)
            y = df[self.x_header].values
        return x, y, total_size


if __name__ == "__main__":
    reader = FileReader("orders1g.tbl", x_header='o_totalprice', y_header='o_orderpriority')
    x, y, size = reader.read()
    print(x)
    print(y)
    print(size)
